***PROVIDES DISTRIBUTION PLOTS OF STUDENT SHARES BOTH PRE AND POST-ASPIRE

clear all
set mem 2g
set more off
cap log close
log using "C:\teacher free riding\distribution_test.smcl", replace
use "C:\teacher free riding\grade_percentages.dta", clear
cd  "C:\teacher free riding\"
set seed 7563543

cd "C:\teacher free riding\"
use "C:\teacher free riding\hisd_data_freeriding_b.dta", clear

sort campus year grade

**IDENTIFY THE MAXIMUM AWARD FOR A TEACHER
gen maxaward = 0
replace maxaward = 5000 if year == 2006 | year == 2007
replace maxaward = 7000 if year == 2008 | year == 2009

gen post = year >= 2006
drop if id == .
xtset id year

drop unit
gen unit = 1
egen enroll = sum(unit), by(campus year)
gen enroll2 = enroll^2
gen enroll3 = enroll^3
gen enroll4 = enroll^4

***NOTE THAT 9TH GRADE SCIENCE AND SOC ARE STANFORD RATHER THAN TAKS IN ASPIRE****
***FOR INITIAL ANALYSIS DO ONLY MATH & READING FOR ALL GRADES (TAKS) AND LANGUAGE FOR ALL GRADES (STANF)
** SCIENCE & SOC FOR 10 & 11.FOR ACROSS DEPARTMENT ANALYSIS, LIMIT TO GRADES 10 & 11****


*ACHIEVEMENT LAGS
foreach subject in "math" "read" {
  gen ltaks_sd_scale_min_`subject' = l.taks_sd_scale_min_`subject'
 }
foreach subject in "math" "read" "lang" "socialstu" "science" {
  gen lstanford_sd_`subject'_scale = l.stanford_sd_`subject'_scale
 }
 
 
*NO SCIENCE & SOC IN 10TH GRADE SO USE 8TH GRADE LAG
foreach subject in "sci" "soc" {
  gen ltaks_sd_scale_min_`subject' = 0 if grade == 10 | grade == 11
  replace ltaks_sd_scale_min_`subject' = l.taks_sd_scale_min_`subject' if grade == 11
  gen l2taks_sd_scale_min_`subject' = 0 if grade == 10 | grade == 11
  replace l2taks_sd_scale_min_`subject' = l2.taks_sd_scale_min_`subject' if grade == 10
 }
 
 *GENERATE PRE-LAGS FROM 2004
 foreach subject in "math" "read" {
 	gen taks_`subject'_2004 = l.taks_sd_scale_min_`subject'
	replace taks_`subject'_2004 = l3.taks_sd_scale_min_`subject' if year == 2007
	replace taks_`subject'_2004 = l4.taks_sd_scale_min_`subject' if year == 2008
	replace taks_`subject'_2004 = l5.taks_sd_scale_min_`subject' if year == 2009
}

 foreach subject in "math" "read" "lang" "science" "socialstu" {
 	gen stanf_`subject'_2004 = l.stanford_sd_`subject'_scale
	replace stanf_`subject'_2004 = l3.stanford_sd_`subject'_scale if year == 2007
	replace stanf_`subject'_2004 = l4.stanford_sd_`subject'_scale if year == 2008
	replace stanf_`subject'_2004 = l5.stanford_sd_`subject'_scale if year == 2009
}

 *GENERATE PRE-LAGS FROM 2003
 foreach subject in "math" "read" {
 	gen taks_`subject'_2003 = l2.taks_sd_scale_min_`subject'
	replace taks_`subject'_2003 = l4.taks_sd_scale_min_`subject' if year == 2007
	replace taks_`subject'_2003 = l5.taks_sd_scale_min_`subject' if year == 2008
	replace taks_`subject'_2003 = l6.taks_sd_scale_min_`subject' if year == 2009
}

 foreach subject in "math" "read" "lang" "science" "socialstu" {
 	gen stanf_`subject'_2003 = l2.stanford_sd_`subject'_scale
	replace stanf_`subject'_2003 = l4.stanford_sd_`subject'_scale if year == 2007
	replace stanf_`subject'_2003 = l5.stanford_sd_`subject'_scale if year == 2008
	replace stanf_`subject'_2003 = l6.stanford_sd_`subject'_scale if year == 2009
}




compress 
*LIMIT TO 2003 AND LATER
keep if year >= 2003

*LIMIT TO GRADES 9 - 11
keep if grade >= 9 & grade <= 11
keep id campus year *taks_sd_scale_min* *stanford_sd_*_scale grade year ethnicity econdis atrisk speced lep gifted enroll* female taks_*_2004* stanf_*_2004* taks_*_2003* stanf_*_2003* 
compress

*MERGE IN STUDENT-TEACHER LINKS
sort id campus year
merge 1:m id campus year using "C:\D\Research\Charter\Houston\HISDdata\DataFiles\SecondGrades\secondary_classes.dta", keepusing(tch_number course_type course crs_title)
drop if _merge == 2


*ALLOW FOR DIFFERENT LAG VALUES BY GRADE & YEAR
foreach year of numlist 2003 2004 2007 2008 2009 {
 foreach grade of numlist 9/11 {
  foreach exam of varlist stanf_*_2003 taks_*_2003 {
    gen `exam'_y`year'_g`grade' = `exam'*(year == `year')*(grade == `grade')
  }
 }
}
compress

*MERGE IN ASPIRE COURSE LISTS --> WORKS WELL FOR 2006-07 & LATER BUT POORLY FOR BEFORE SO WILL NEED TO USE OLD METHOD FOR PRE-PERIOD
drop _merge
sort course
merge course using "C:\teacher free riding\aspire_course_list_2009.dta", keep(subject)
replace subject = "" if year <= 2005
replace course_type = "" if year >= 2006
replace course_type = "eng" if subject == "Language Arts" & year >= 2006
replace course_type = "read" if subject == "Reading" & year >= 2006
replace course_type = "math" if subject == "Math" & year >= 2006
replace course_type = "sci" if subject == "Science" & year >= 2006
replace course_type = "soc" if subject == "Social Studies" & year >= 2006
drop if course_type == ""


gen course_type_a = course_type
replace course_type_a = "eng/read" if course_type == "eng"
replace course_type_a = "eng/read" if course_type == "read"

***MERGE IN TEACHER PERCENTAGE DATA
sort campus year grade course_type_a tch_number
drop _merge
merge campus year grade course_type_a tch_number using "C:\teacher free riding\grade_percentages.dta"
drop if _merge == 2

*DROP TEACHERS WITH FEWER THAN 10 STUDENTS
drop if students < 10

*MERGE IN DEPARTMENT SIZE
drop _merge
sort campus year grade
merge campus year grade using "C:\teacher free riding\grade_departments.dta"
drop if _merge == 2

/*
***OPTION TO MERGE IN SCHOOL LEVEL PERCENTAGES AND DEPT SIZES FOR 2006-07
sort campus year course_type tch_number
drop _merge
replace share_students = . if year == 2006
merge m:1 campus year course_type tch_number using "C:\teacher free riding\school_percentages.dta", update keepusing(share_students)
drop if _merge == 2

sort campus year
drop _merge
merge campus year using "C:\teacher free riding\school_departments.dta"
drop if _merge == 2
foreach subject in "eng" "math" "read" "sci" "soc" {
   replace teachers_grade_`subject' = teachers_school_`subject' if year == 2006
}
*/

*MERGE IN TEACHER ID LINKED OVER TIME DERIVED FROM NAMES (CONDITIONAL ON BEING IN HISD IN 2006-07 OR LATER)
*NOTE THAT ONLY A SUBSET OF TEACHERS CAN BE LINKED BACK BEFORE 2006-07
destring tch_number, replace
sort tch_number
drop _merge
merge m:m tch_number using teacher_linked_ids
drop if _merge == 2


*HISD HAD AN EXPERIMENTAL PROGRAM IN 2005-06 THAT WAS BASED ON INDIVIDUAL REWARDS, HENCE WE'LL DROP THIS YEAR
drop if year == 2005

*2006 IS BASED ON CAMPUS-WIDE DEPARTMENTAL AWARDS RATHER THAN GRADE LEVEL, SO DROP THIS YEAR TOO
drop if year == 2006

***ASSIGN WEIGHTS SO THAT EACH STUDENT HAS A VALUE OF 1 IN EACH SUBJECT
duplicates tag id year campus course_type, gen(classes_in_subject)
replace classes_in_subject = classes_in_subject + 1
gen weight = 1/classes_in_subject
drop if id == .


**GENERATE POST INDICATORS
gen post = year >= 2006
gen post_share_students = post*share_students
foreach var of varlist teachers_grade_* {
	gen post_`var' = post*`var'
}
gen school_year = campus*10000 + year

**HISTOGRAMS
cap drop post
gen post = .
replace post = 0 if year == 2003 | year == 2004 
replace post = 1 if year >= 2007 & year != .
sort post


*DROP TEACHERS WHO HAVE MORE THAN 90% SPECIAL EDUCATION or LEPSTUDENTS
	egen spec_ed_share = mean(speced), by(tch_number campus grade year subject)
	egen lep_share = mean(lep), by(tch_number campus grade year subject)
	drop if spec_ed_share > .8
	drop if lep_share > .8
	
keep if grade >= 9 & grade <= 11

*COLLAPSE TO ONE OBSERVATION PER TEACHER-GRADE
keep if regular_hs == 1
collapse (mean) share_students, by(tch_number campus grade year course_type)
gen post = year > 2006
sort post
set scheme s2color
label define period 0 "2003-04 to 2004-05" 1 "2007-08 to 2008-09"
label values post period

	*MATH
	hist share_students if course_type == "math" , by(post, title("Math") graphregion(icolor(white) color(white)) legend(off) note("")) subtitle(,bcolor(white)) xtitle("Share of Students in Grade") kdensity kdenopts(bwidth(0.03)) width(0.03)
	graph export math_dist.eps, as(eps) replace
	by post: sum share_students if course_type == "math", detail 

	*ENGLISH
	hist share_students if course_type == "eng" , by(post, title("English Language Arts")  graphregion(icolor(white) color(white)) legend(off) note("")) subtitle(,bcolor(white)) xtitle("Share of Students in Grade") kdensity kdenopts(bwidth(0.03)) width(0.03)
	graph export eng_dist.eps, as(eps) replace
	by post: sum share_students if course_type == "eng", detail 
	
	
	*SCIENCE
	hist share_students if course_type == "sci", by(post, title("Science") graphregion(icolor(white) color(white)) legend(off) note("")) subtitle(,bcolor(white)) xtitle("Share of Students in Grade")  kdensity kdenopts(bwidth(0.03)) width(0.03)
	graph export sci_dist.eps, as(eps) replace
	by post: sum share_students if course_type == "sci", detail 

	*SOC
	hist share_students if course_type == "soc", by(post, title("Social Studies") graphregion(icolor(white) color(white)) legend(off) note("")) subtitle(,bcolor(white)) xtitle("Share of Students in Grade") kdensity kdenopts(bwidth(0.03)) width(0.03)
	graph export soc_dist.eps, as(eps) replace
	by post: sum share_students if course_type == "soc", detail 


	cap log close
	
